# create UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
upset_plot <- function(df, x, y) {
# df - dataframe; x - Flq resistance mechanism column; y - Laboratory typing method column
require(dplyr, ggplot2, ggupset)
# checks if lab typing method is MIC or disk diffusion
if (grepl('MIC.*$', df[y])) {
df %>%
group_by(strain, Measurement) %>%
# !! sym() allows one to pass a column name as an argument
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count() +
# EUCAST cut-off of 0.5 mg/L
geom_hline(aes(yintercept = 0.5, linetype = "EUCAST"), alpha = 0.6, color = "black") +
# CLSI cut-off of 1 mg/L
geom_hline(aes(yintercept = 1.0, linetype = "CLSI"), color = "black") +
labs(x = "Fluoroquinolone resistance mutations", y = "Ciprofloxacin MIC measurement (mg/L)",
size = "Number of strains", linetype = "MIC breakpoints") +
# convert y-axis to log2 scale and labels to 2 dp
scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 2)) +
scale_linetype_manual(values = c("dotted", "solid")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
} else {
df %>%
group_by(strain, Measurement) %>%
summarize(res_mech_list = list(!! sym(x))) %>%
ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count() +
# EUCAST cut-off of 22mm
geom_hline(aes(yintercept = 22, linetype = "EUCAST"), alpha = 0.6, color = "black") +
# CLSI cut-off of 21mm
geom_hline(aes(yintercept = 21, linetype = "CLSI"), color = "black") +
labs(title = "Disk diffusion measurements", x =
"Fluoroquinolone resistance mutations", y = "Ciprofloxacin disk diffusion measurement (mm)",
size = "Number of strains", linetype = "Disk diffusion breakpoints") +
scale_linetype_manual(values = c("dotted", "solid")) +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8)
}
}library(here)
library(tidyverse)
library(scales)
library(ggupset)
library(RColorBrewer)
library(collapse)
library(DT)# read antibiogram data
antibiogram <- read.delim(file = "antibiogram_combined.tsv",
header = TRUE, sep = "\t")
# filter for ciprofloxacin
cipro_antibiogram <- antibiogram %>%
filter(Antibiotic == "ciprofloxacin") %>%
select(strain:ST, Flq_mutations, Flq_acquired, Omp_mutations, Antibiotic:Measurement.Round) %>%
mutate(Resistance.phenotype = case_when(grepl('MIC.*$', Laboratory.Typing.Method) & Measurement <= 0.25 ~
"susceptible",
grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard ==
"EUCAST" & Measurement > 0.5 ~ "resistant",
grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard ==
"CLSI" & Measurement >= 1 ~ "resistant",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"EUCAST" & Measurement >= 25 ~ "susceptible",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"EUCAST" & Measurement < 22 ~ "resistant",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"CLSI" & Measurement >= 26 ~ "susceptible",
Laboratory.Typing.Method == "Disk diffusion" & Testing.standard ==
"CLSI" & Measurement <= 21 ~ "resistant",
TRUE ~ "resistant"))
#write_delim(cipro_antibiogram, file = "cipro_antibiogram.tsv", delim = "\t")
# combine antibiogram with mutations not in kleborate data file
no_mut_kleborate <- read.delim(file = "mutations_not_in_kleborate.tsv",
header = TRUE, sep = "\t")
cipro_antibiogram <- cipro_antibiogram %>%
left_join(no_mut_kleborate, by = c("strain")) %>%
mutate(Flq_mutations = case_when(Flq_mutations.y %in% NA ~ Flq_mutations.x,
Flq_mutations.x == "-" & Flq_mutations.y %in% NA ~ Flq_mutations.x,
TRUE ~ Flq_mutations.y)) %>%
select(- c(Flq_mutations.x, Flq_mutations.y)) %>%
relocate(Flq_mutations, .after = ST)cipro_antibiogram %>%
group_by(index) %>%
ggplot(aes(x = index)) +
geom_bar(fill = "#ff8000", alpha = 0.6) +
geom_text(aes(label = ..count..), stat = "count", nudge_y = 60) +
labs(title = "Total sample count per dataset", y = "Total sample count", fill = "Lab typing method") +
theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
ggplot(aes(Measurement, fill = index)) + geom_bar() +
# EUCAST cut-off of 22mm
geom_vline(aes(xintercept = 22, linetype = "EUCAST"), color = "black") +
# CLSI cut-off of 21mm
geom_vline(aes(xintercept = 21, linetype = "CLSI"), color = "black") +
labs(title = "Disk Diffusion Measurements", x = "Measurement (mm)", y = "Frequency", fill = "Dataset",
linetype = "Disk diffusion \nbreakpoints") +
scale_linetype_manual(values = c("dotted", "solid"))cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() +
# EUCAST cut-off of 0.5 mg/L
geom_vline(aes(xintercept = 6, linetype = "EUCAST"), alpha = 0.6, color = "black") +
# CLSI cut-off of 1 mg/L
geom_vline(aes(xintercept = 8, linetype = "CLSI"), color = "black") +
labs(title = "MIC Broth Dilution Measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset",
linetype = "MIC breakpoints") +
scale_linetype_manual(values = c("dotted", "solid")) cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() +
# EUCAST cut-off of 0.5 mg/L
geom_vline(aes(xintercept = 5, linetype = "EUCAST"), alpha = 0.6, color = "black") +
# CLSI cut-off of 1 mg/L
geom_vline(aes(xintercept = 6, linetype = "CLSI"), color = "black") +
labs(title = "MIC Agar Dilution Measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset",
linetype = "MIC breakpoints") +
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
scale_linetype_manual(values = c("dotted", "solid")) # combine Flq_mutations, Flq_acquired and Omp_mutations into one column
cipro_res_mech <- cipro_antibiogram %>%
pivot_longer(Flq_mutations:Omp_mutations, names_to = "Flq.resistance.mech",
values_to = "Resistance.mech.type") %>%
separate(Resistance.mech.type, into = c("type_A", "type_B", "type_C"), sep = "[;]")
cipro_res_mech <- cipro_res_mech %>%
pivot_longer(cols = names(cipro_res_mech)[26:28], values_to = "Resistance.mech.type") %>%
drop_na(Resistance.mech.type) %>%
select(-name)
rare_res_qnr_genes <- c("qnrB7", "qnrE2", "qnrS2", "qnrVC1", "qnrVC6")
# now label all OmpK35 truncations as such, e.g. "OmpK35-21%" -> "OmpK35-trunc"
# as these all reflect the same mechanism (i.e. truncation/loss of the porin)
cipro_res_mech <- cipro_res_mech %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, grepl("OmpK35-", Resistance.mech.type), "OmpK35-trunc")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, grepl("OmpK36-", Resistance.mech.type), "OmpK36-trunc")) %>%
# based on statistical analysis (chunk prop_test), combine QRDR mutations to have just 4 types (GyrA-83, GyrA-87, ParC-80, ParC-84)
# except GyrA-87H which is found in one strain and is sensitive
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "GyrA-83"), "GyrA-83")) %>%
mutate(Resistance.mech.type = case_when(startsWith(Resistance.mech.type, "GyrA-87") & Resistance.mech.type != "GyrA-87H" ~ "GyrA-87",
TRUE ~ Resistance.mech.type)) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "ParC-80"), "ParC-80")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "ParC-84"), "ParC-84")) %>%
# also combine the following acquired genes
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qepA2"), "qepA2")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qnrA1"), "qnrA1")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qnrB1"), "qnrB1")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qnrB19"), "qnrB19")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qnrB2"), "qnrB2")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, startsWith(Resistance.mech.type, "qnrS1"), "qnrS1")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, grepl(paste(rare_res_qnr_genes, collapse = "|"), Resistance.mech.type), "rare_res_qnr"))
# explicitly label strains with wildtype QRDR (gyrA/parC), wt porin (Omp), and no acquired genes as such
cipro_res_mech <- cipro_res_mech %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_mutations" & Resistance.mech.type=="-", "wt_QRDR")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Omp_mutations" & Resistance.mech.type=="-", "wt_Omp")) %>%
mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_acquired" & Resistance.mech.type=="-", "no_acquired_genes"))# count number of occurrences of the different fluoroquinolone resistance determinants
summ_res_mech <- cipro_res_mech %>%
filter(Resistance.mech.type != "wt_Omp" & Resistance.mech.type != "wt_QRDR" &
Resistance.mech.type != "no_acquired_genes") %>%
group_by(Flq.resistance.mech, Resistance.mech.type) %>%
summarise(count = n())
DT::datatable(summ_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))summ_res_mech %>%
ggplot(aes(Resistance.mech.type, count, fill = Flq.resistance.mech)) +
geom_bar(stat = "identity") +
geom_text(aes(label = count), stat = "identity", nudge_y = 60) +
labs(title = "Total sample count per resistance type", x = "Resistance mechanism",
y = "Total sample count", fill = "Fluoroquinolone resistance \nmechanism type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Accent")cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method") +
labs(title = "MIC (broth dilution) measurements") cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method") +
labs(title = "MIC (agar dilution) measurements") cipro_res_mech %>%
filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method")# count STs with resistant phenotype
top_35_st_res <- cipro_antibiogram %>%
filter(Resistance.phenotype == "resistant") %>%
filter(Flq_mutations != "-" | Flq_acquired != "-" | Omp_mutations != "-") %>%
group_by(ST) %>%
summarise(sample_count = n()) %>%
as.data.frame() %>%
arrange(desc(sample_count)) %>%
head(35) %>%
mutate(ST = factor(ST, levels = ST, ordered = TRUE))
DT::datatable(top_35_st_res,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))# Blues palette with 9 colors
colour_palette <- brewer.pal(9, "Blues")
# Add more colors to this palette :
colour_palette <- colorRampPalette(colour_palette)(35)
top_35_st_res %>%
ggplot(aes(y = ST, x = sample_count, fill = ST, label = sample_count)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(hjust = -0.5) +
scale_y_discrete(limits = rev) +
ggtitle("Top STs with fluoroquinolone resistance") +
labs(x = "Sample count") +
# reverse order of colour palette
scale_fill_manual(values = rev(colour_palette)) +
theme_minimal() +
theme(legend.position = "none",
axis.text = element_text(size = 10)) +
coord_cartesian(clip = "off")st_totals <- cipro_antibiogram %>%
group_by(ST) %>%
summarise(N=n())
st_res_mech <- cipro_res_mech %>%
filter(Resistance.phenotype == "resistant") %>%
filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "wt_Omp",
Resistance.mech.type != "no_acquired_genes") %>%
count(ST, Resistance.mech.type) %>%
left_join(st_totals) %>% # ST total counts
mutate(prop = n/N) %>% # proportion per ST
mutate(se = sqrt(prop*(1-prop)/N)) %>%
mutate(p_lowerCI = prop-1.96*se, p_upperCI = prop+1.96*se) %>%
mutate(p_lowerCI = replace(p_lowerCI, p_lowerCI<0, 0)) %>%
mutate(p_upperCI = replace(p_upperCI, p_upperCI>1, 1)) %>%
select(-se) %>%
mutate(ST = factor(ST, levels = unique(ST)))
DT::datatable(st_res_mech,
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_res_mech %>%
filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
ggplot(aes(x=ST, y = Resistance.mech.type)) +
geom_tile(aes(fill=prop)) +
labs(subtitle = "Fluoroquinolone resistance mechanisms in top resistant STs",
y = "Fluoroquinolone resistance mechanisms", fill = "Proportion per \nST") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8, colour = "black")) +
scale_fill_gradient(low = "white", high = "red")st_res_mech %>%
filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
select(- c("p_lowerCI", "p_upperCI")) %>%
group_by(ST, prop) %>%
summarize(res_mech_list = list(Resistance.mech.type)) %>%
ggplot(aes(res_mech_list, ST, fill = prop)) + geom_tile() +
labs(title = "Proportion of fluoroquinolone resistance determinants per ST",
x = "Fluoroquinolone resistance determinants", y = "ST", fill = "Proportion per\nST") +
scale_x_upset() +
theme_combmatrix(combmatrix.panel.line.size = 0.8) +
scale_fill_gradient(low = "white", high = "red")no_res_mech <- cipro_antibiogram %>%
filter(Flq_mutations == "-", Flq_acquired == "-", Omp_mutations == "-") %>%
filter(Resistance.phenotype == "resistant")
st_no_res_mech <- no_res_mech %>%
group_by(ST) %>%
summarise(sample_count = n()) %>%
as.data.frame() %>%
arrange(desc(sample_count)) %>%
mutate(ST = factor(ST, levels = ST, ordered = TRUE))
DT::datatable(st_no_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_no_res_mech %>%
filter(sample_count > 2) %>%
ggplot(aes(ST, sample_count)) +
geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
labs(title = "Resistant STs with no fluoroquinolone resistance mechanisms", y = "Count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))sus_res_mech <- cipro_res_mech %>%
filter(Resistance.phenotype == "susceptible") %>%
filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "wt_Omp",
Resistance.mech.type != "no_acquired_genes")
st_sus_res_mech <- sus_res_mech %>%
group_by(ST) %>%
summarise(res_mech = Resistance.mech.type) %>%
as.data.frame() %>%
count(ST, res_mech, sort = TRUE) %>%
mutate(ST = factor(ST, levels = unique(ST)))
DT::datatable(st_sus_res_mech,
width = "50%",
extensions = "FixedHeader",
rownames = FALSE,
options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))st_sus_res_mech %>%
filter(n != 1) %>%
ggplot(aes(ST, n, fill = res_mech)) +
geom_bar(stat = "identity") +
labs(title = "Fluoroquinolone resistance mechanisms employed by the top susceptible STs", y = "Count", fill = "Fluoroquinolone resistance\nmechanisms") +
# put legend in one column
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))sus_res_mech %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method")sus_res_mech %>%
filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method") +
labs(title = "MIC (broth dilution) measurements") sus_res_mech %>%
filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
upset_plot("Resistance.mech.type", "Laboratory.Typing.Method") +
labs(title = "MIC (agar dilution) measurements") # calculate frequencies of different resistance mechanisms for the different phenotypes
# including numbers for the wildtype groups for comparison to compare resistance frequency
# amongst strains with the determinant vs strains without
mech_freq <- cipro_res_mech %>%
group_by(Resistance.mech.type, Resistance.phenotype) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(Resistance.mech.type) %>%
pivot_wider(names_from = Resistance.phenotype, values_from = n)
# convert na values to 0
mech_freq[is.na(mech_freq)] <- 0
# find totals of different resistance mechanisms
mech_freq <- mech_freq %>%
mutate(total = sum(c(resistant, susceptible)))
# compare each gyrA/parC SNP vs wildtype QRDR
qrdr_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_QRDR")
qrdr_mut <- mech_freq %>%
filter(startsWith(Resistance.mech.type, "GyrA") | startsWith(Resistance.mech.type, "ParC")) %>%
mutate(wt_res = qrdr_wt[2], wt_total=qrdr_wt[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
qrdr_prop_out <- dapply(as.matrix(qrdr_mut), MARGIN = 1,
FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
# combine qrdr_mut and qrdr_prop_out
qrdr_mut_freq_prop <- cbind(qrdr_mut, qrdr_prop_out) %>%
select(resistant, total, estimate1, estimate2, p.value) %>%
mutate(p.value=as.numeric(p.value))
qrdr_mut_freq_prop
# specific mutations observed more than once, significantly associated with elevated resistance (vs wildtype QRDR)
view(qrdr_mut_freq_prop %>% filter(total>1) %>% filter(p.value < 0.05))
# the data provides evidence that ANY mutation at the 4 codons we track is associated with resistance, even though for some individual mutations we only have one observation:
# check gyrA83, there are 5 mutations observed at this codon (A, F, I, L, Y)
# 4 have large numbers (≥10) and significant p-values, gyrA-83A is observed only once but is also resistant
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-83"))
# Similar for GyrA-87, except that GyrA87-H is only seen in one strain and it is sensitive, so exclude this one (not a borderline phenotype)
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-87"))
# ParC-80 and ParC-84 look the same as for GyrA-83, ie all mutations at the site have same effect can be grouped together
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-80"))
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-84"))
# based on the statistical analysis, QRDR mutations will be updated to have just 4 types
# (GyrA-83, GyrA-87, ParC-80, ParC-84) instead of 19 most of which are rare
# compare each Omp mutations vs wildtype Omp
omp_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_Omp")
omp_mut <- mech_freq %>%
filter(startsWith(Resistance.mech.type, "Omp")) %>%
mutate(wt_res = omp_wt[2], wt_total=omp_wt[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
omp_prop_out <- dapply(as.matrix(omp_mut), MARGIN = 1,
FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
omp_mut_freq_prop <- cbind(omp_mut, omp_prop_out) %>%
select(resistant, total, estimate1, estimate2, p.value) %>%
mutate(p.value=as.numeric(p.value))
# all four mutations (truncation of either ompK35 or ompK36; and insertion of GD or TD in OmpK36, are all significantly associated with resistance)
view(omp_mut_freq_prop)
# compare acquired vs no acquired genes
no_acquired <- mech_freq %>% filter(Resistance.mech.type == "no_acquired_genes")
acquired_genes <- mech_freq %>%
filter(startsWith(Resistance.mech.type, "qnr") | startsWith(Resistance.mech.type, "qep") ) %>%
mutate(wt_res = no_acquired[2], wt_total=no_acquired[4]) # add columns with the wildtype counts for res & total
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
acquired_genes_prop_out <- dapply(as.matrix(acquired_genes), MARGIN = 1,
FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))
acquired_genes_freq_prop <- cbind(acquired_genes, acquired_genes_prop_out) %>%
select(resistant, total, estimate1, estimate2, p.value) %>%
mutate(p.value=as.numeric(p.value))
view(acquired_genes_freq_prop)
# from the statistical analysis, the following can be combined:
# qepA2 (n=1, which is resistant) with qepA2* (n=17, all of which are resistant)
# qnrA1 and qnrA1^ (note the ^ means they are identical at protein level anyway) as all are resistant
# qnrB1.v1, qnrB1.v2, qnrB1.v2^ for similar reasons
# qnrB19, qnrB19^
# qnrB2.1, qnrB2.2
# qnrS1, qnrS1?, qnrS1*
# could also combine all the 'rare' genes that are only seen 1-2 times each but are always resistant:
# -> qnrB7, qnrE2, qnrS2, qnrVC1, qnrVC6sessionInfo()## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.26.1 collapse_1.8.9 RColorBrewer_1.1-3 ggupset_0.3.0
## [5] scales_1.2.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.10
## [9] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [13] ggplot2_3.4.0 tidyverse_1.3.2 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 lubridate_1.9.0 assertthat_0.2.1
## [4] rprojroot_2.0.3 digest_0.6.30 utf8_1.2.2
## [7] R6_2.5.1 cellranger_1.1.0 backports_1.4.1
## [10] reprex_2.0.2 evaluate_0.18 highr_0.9
## [13] httr_1.4.4 pillar_1.8.1 rlang_1.0.6
## [16] googlesheets4_1.0.1 readxl_1.4.1 rstudioapi_0.14
## [19] jquerylib_0.1.4 rmarkdown_2.18 labeling_0.4.2
## [22] googledrive_2.0.0 htmlwidgets_1.5.4 munsell_0.5.0
## [25] broom_1.0.1 compiler_4.2.2 modelr_0.1.10
## [28] xfun_0.35 pkgconfig_2.0.3 htmltools_0.5.3
## [31] tidyselect_1.1.2 fansi_1.0.3 crayon_1.5.2
## [34] tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0
## [37] grid_4.2.2 jsonlite_1.8.3 gtable_0.3.0
## [40] lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3
## [43] cli_3.4.1 stringi_1.7.8 cachem_1.0.6
## [46] farver_2.1.1 fs_1.5.2 xml2_1.3.3
## [49] bslib_0.4.1 ellipsis_0.3.2 generics_0.1.2
## [52] vctrs_0.5.1 tools_4.2.2 glue_1.6.2
## [55] crosstalk_1.2.0 hms_1.1.2 parallel_4.2.2
## [58] fastmap_1.1.0 yaml_2.3.6 timechange_0.1.1
## [61] colorspace_2.0-3 gargle_1.2.1 rvest_1.0.3
## [64] knitr_1.41 haven_2.5.1 sass_0.4.3